from bs4 import BeautifulSoup
import requests


head_url = "https://www.sina.com.cn/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}


reaponse = requests.get(head_url, headers=headers)
soup = BeautifulSoup(reaponse.text, "lxml")
x = soup.select('.xxxx script')[0].text.replace(' ', '').strip(')').split(',')
resulturl = 'http://www2017.tyut.edu.cn/system/resource/code/news/click/dynclicks.jsp?clickid={}&owner={}&clicktype=wbnews'.format(x[2], x[1])
count = BeautifulSoup(requests.get(resulturl).text, 'html.parser')

# import urllib.request as urllib2
# import requests
# import sys
# import re
# import os
#
#
# # ***********fuction define************#
# def extract_url(info):
#     rege = "<li><span class=\"title\"><a href=\"(.*?)\">"  # fei tan lan mo shi
#     re_url = re.findall(rege, info.decode('utf-8'))
#     n = len(re_url)
#     for i in range(0, n):
#         re_url[i] = "http://news.swjtu.edu.cn/" + re_url[i]
#     return re_url
#
#
# def extract_title(sub_web):
#     re_key = "<h4>\r\n                    (.*)\r\n                    </h4>"
#     title = re.findall(re_key, sub_web)
#
#     return title
#
#
# def extract_date(sub_web):
#     re_key = "日期：(.*?)    "
#     date = re.findall(re_key, sub_web)
#     return date
#
#
# def extract_counts(sub_web):
#     re_key = "点击数：(.*?)  "
#     counts = re.findall(re_key, sub_web)
#     return counts
#
#
# # *************main**************#
# fp = open('output.txt', 'w')
# # content = urllib2.urlopen('https://www.bilibili.com/?spm_id_from=666.4.b_696e7465726e6174696f6e616c486561646572.1').read()
# url = 'https://news.swjtu.edu.cn/ShowList-82-0-1.shtml'
# content = requests.get(url).content
# url = extract_url(content)
# string = ""
# n = len(url)
# print(n)
# for i in range(0, n):
#     sub_web = urllib2.urlopen(url[i]).read()
#     sub_title = extract_title(sub_web)
#     string += sub_title[0]
#     string += '   '
#     sub_date = extract_date(sub_web)
#     string += "日期：" + sub_date[0]
#     string += '   '
#     sub_counts = extract_counts(sub_web)
#     string += "点击数：" + sub_counts[0]
#     string += '\n'
#     # print string
# print(string)
# fp.close()
